Harmonies#

%load_ext autoreload
%autoreload 2
import os
from collections import Counter
from statistics import mean

import dimcat as dc
import ms3
import pandas as pd
from dimcat import plotting
from dimcat.utils import grams, make_transition_matrix
import utils

pd.set_option("display.max_rows", 1000)
pd.set_option("display.max_columns", 500)
RESULTS_PATH = os.path.abspath(os.path.join(utils.OUTPUT_FOLDER, "harmonies"))
os.makedirs(RESULTS_PATH, exist_ok=True)


def make_output_path(
    filename: str,
    extension=None,
    path=RESULTS_PATH,
) -> str:
    return utils.make_output_path(filename=filename, extension=extension, path=path)


def save_figure_as(
    fig, filename, formats=("png", "pdf"), directory=RESULTS_PATH, **kwargs
):
    if formats is not None:
        for fmt in formats:
            plotting.write_image(fig, filename, directory, format=fmt, **kwargs)
    else:
        plotting.write_image(fig, filename, directory, **kwargs)

Loading data

D = utils.get_dataset("couperin_concerts", corpus_release="v2.2")
D
Dataset
=======
{'inputs': {'basepath': None,
            'packages': {'couperin_concerts': ["'couperin_concerts.measures' (MuseScoreMeasures)",
                                               "'couperin_concerts.notes' (MuseScoreNotes)",
                                               "'couperin_concerts.expanded' (MuseScoreHarmonies)",
                                               "'couperin_concerts.chords' (MuseScoreChords)",
                                               "'couperin_concerts.metadata' (Metadata)"]}},
 'outputs': {'basepath': None, 'packages': {}},
 'pipeline': []}

All labels

labels = D.get_feature("harmonylabels")
labels
mc mn quarterbeats duration_qb mc_onset mn_onset timesig staff voice volta label pedal numeral form figbass changes relativeroot cadence phraseend chord_type chord_tones added_tones root bass_note alt_label globalkey_is_minor localkey_is_minor globalkey_mode localkey_mode localkey_resolved localkey_and_mode root_roman relativeroot_resolved effective_localkey effective_localkey_resolved effective_localkey_is_minor pedal_resolved chord_and_mode chord_reduced chord_reduced_and_mode applied_to_numeral numeral_or_applied_to_numeral intervals_over_bass intervals_over_root scale_degrees scale_degrees_and_mode scale_degrees_major scale_degrees_minor globalkey localkey chord
corpus piece i
couperin_concerts c01n01_prelude 0 1 0 0 2.00 0 1/2 4/4 1 1 <NA> G.I{ <NA> I <NA> <NA> <NA> <NA> <NA> { M (0, 4, 1) () 0 0 <NA> False False major major I I, major I NaN I I False <NA> I, major I I, major <NA> I (M3, P5) (M3, P5) (1, 3, 5) (1, 3, 5), major (1, 3, 5) (1, #3, 5) G I I
1 2 1 2 2.00 0 0 4/4 1 1 <NA> V <NA> V <NA> <NA> <NA> <NA> <NA> <NA> M (1, 5, 2) () 1 1 <NA> False False major major I I, major V NaN I I False <NA> V, major V V, major <NA> V (M3, P5) (M3, P5) (5, 7, 2) (5, 7, 2), major (5, 7, 2) (5, #7, 2) G I V
2 2 1 4 0.50 1/2 1/2 4/4 1 1 <NA> I6 <NA> I <NA> 6 <NA> <NA> <NA> <NA> M (4, 1, 0) () 0 4 <NA> False False major major I I, major I NaN I I False <NA> I6, major I6 I6, major <NA> I (m3, m6) (M3, P5) (3, 5, 1) (3, 5, 1), major (3, 5, 1) (#3, 5, 1) G I I6
3 2 1 9/2 0.50 5/8 5/8 4/4 1 1 <NA> I <NA> I <NA> <NA> <NA> <NA> <NA> <NA> M (0, 4, 1) () 0 0 <NA> False False major major I I, major I NaN I I False <NA> I, major I I, major <NA> I (M3, P5) (M3, P5) (1, 3, 5) (1, 3, 5), major (1, 3, 5) (1, #3, 5) G I I
4 2 1 5 0.75 3/4 3/4 4/4 1 1 <NA> V(4) <NA> V <NA> <NA> 4 <NA> <NA> <NA> M (1, 0, 2) () 1 1 <NA> False False major major I I, major V NaN I I False <NA> V(4), major V V, major <NA> V (P4, P5) (P4, P5) (5, 1, 2) (5, 1, 2), major (5, 1, 2) (5, 1, 2) G I V(4)
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
parnasse_07 230 52 52 411/2 0.25 3/8 3/8 4/4 1 1 <NA> i64 <NA> i <NA> 64 <NA> <NA> <NA> <NA> m (1, 0, -3) () 0 1 <NA> True True minor minor i i, minor i NaN i i True <NA> i64, minor i64 i64, minor <NA> i (P4, m6) (m3, P5) (5, 1, 3) (5, 1, 3), minor (5, 1, b3) (5, 1, 3) b i i64
231 52 52 823/4 0.25 7/16 7/16 4/4 1 1 <NA> iio64 <NA> ii o 64 <NA> <NA> <NA> <NA> o (-4, 2, -1) () 2 -4 <NA> True True minor minor i i, minor ii NaN i i True <NA> iio64, minor iio64 iio64, minor <NA> ii (a4, M6) (m3, d5) (6, 2, 4) (6, 2, 4), minor (b6, 2, 4) (6, 2, 4) b i iio64
232 52 52 206 1.00 1/2 1/2 4/4 1 1 <NA> i6 <NA> i <NA> 6 <NA> <NA> <NA> <NA> m (-3, 1, 0) () 0 -3 <NA> True True minor minor i i, minor i NaN i i True <NA> i6, minor i6 i6, minor <NA> i (M3, M6) (m3, P5) (3, 5, 1) (3, 5, 1), minor (b3, 5, 1) (3, 5, 1) b i i6
233 52 52 207 1.00 3/4 3/4 4/4 1 1 <NA> V <NA> V <NA> <NA> <NA> <NA> <NA> <NA> M (1, 5, 2) () 1 1 <NA> True True minor minor i i, minor V NaN i i True <NA> V, minor V V, minor <NA> V (M3, P5) (M3, P5) (5, #7, 2) (5, #7, 2), minor (5, 7, 2) (5, #7, 2) b i V
234 53 53 208 4.00 0 0 4/4 1 1 <NA> i|PAC} <NA> i <NA> <NA> <NA> <NA> PAC } m (0, -3, 1) () 0 0 <NA> True True minor minor i i, minor i NaN i i True <NA> i, minor i i, minor <NA> i (m3, P5) (m3, P5) (1, 3, 5) (1, 3, 5), minor (1, b3, 5) (1, 3, 5) b i i

8376 rows × 51 columns

metadata = D.get_metadata()
is_annotated_mask = metadata.label_count > 0
is_annotated_index = dc.PieceIndex(metadata.index[is_annotated_mask])
annotated_notes = D.get_feature("notes").subselect(is_annotated_index)
print(f"The annotated pieces have {len(annotated_notes)} notes.")
The annotated pieces have 35542 notes.

Delete @none labels This creates progressions between the label before and after the @none label that might not actually be perceived as transitions!

df = utils.remove_none_labels(labels.df)
Length before: 8376
There are 0 @none labels which we are going to delete.
Length after: 8376

Delete non-chord labels (typically, phrase labels)

df = utils.remove_non_chord_labels(df)
Length before: 8376
There are 0 non-chord labels which we are going to delete:
Series([], Name: count, dtype: Int64)
Length after: 8376
key_region_groups, key_region2key = ms3.adjacency_groups(df.localkey)
df["key_regions"] = key_region_groups

Unigrams#

k = 25
df.chord.value_counts().iloc[:k]
chord
V        1175
I         965
i         763
V65       470
I6        420
V7        413
i6        395
V43       303
ii%65     233
IV        212
V2        177
ii65      176
V6        154
V(4)      130
ii%43     128
vi        123
iv        112
ii        108
IV6       102
iv6        69
i64        69
V(64)      69
ii7        65
ii6        58
VI         57
Name: count, dtype: Int64
font_dict = {"font": {"size": 20}}
H_LAYOUT = utils.STD_LAYOUT.copy()
H_LAYOUT.update(
    {
        "legend": dict(
            {"orientation": "h", "itemsizing": "constant", "x": -0.05}, **font_dict
        )
    }
)
fig = utils.plot_cum(
    df.chord,
    x_log=True,
    markersize=4,
    left_range=(-0.03, 3.7),
    right_range=(-0.01, 1.11),
    **H_LAYOUT,
)
save_figure_as(fig, "chord_label_unigram_distribution")
fig

Unigrams in major segments#

minor, major = df[df.localkey_is_minor], df[~df.localkey_is_minor]
print(
    f"{len(major)} tokens ({len(major.chord.unique())} types) in major and {len(minor)} "
    f"({len(minor.chord.unique())} types) in minor."
)
4138 tokens (119 types) in major and 4238 (169 types) in minor.
major.chord.value_counts().iloc[:k]
chord
I        906
V        615
I6       416
V65      232
V7       228
IV       165
ii65     159
V43      131
vi       122
V6       117
ii        89
IV6       83
V(4)      81
V2        79
ii7       52
ii6       41
V(64)     40
I64       34
vi7       33
V43/V     32
ii43      30
V65/V     28
vi6       28
iii6      24
IVM7      22
Name: count, dtype: Int64
fig = utils.plot_cum(
    major.chord,
    x_log=True,
    markersize=4,
    left_range=(-0.03, 3.7),
    right_range=(-0.01, 1.11),
    **H_LAYOUT,
)
save_figure_as(fig, "chord_label_unigram_distribution_in_major")
fig.show()

Unigrams in minor segments#

print(
    f"{len(major)} tokens ({len(major.chord.unique())} types) in major and {len(minor)} "
    f"({len(minor.chord.unique())} types) in minor."
)
4138 tokens (119 types) in major and 4238 (169 types) in minor.
minor.chord.value_counts().iloc[:k]
chord
i         761
V         560
i6        394
V65       238
ii%65     230
V7        185
V43       172
ii%43     128
iv        112
V2         98
iv6        69
i64        69
I          59
VI         57
V(4)       49
VIM7       47
IV         47
V65/iv     45
v6         43
V6         37
ii%7       34
iio        33
III        32
V(64)      29
iv7        26
Name: count, dtype: Int64
fig = utils.plot_cum(
    minor.chord,
    x_log=True,
    markersize=4,
    left_range=(-0.03, 3.7),
    right_range=(-0.01, 1.11),
    **H_LAYOUT,
)
save_figure_as(fig, "chord_label_unigram_distribution_in_minor")
fig.show()

Bigrams#

chord_successions = [s.to_list() for _, s in df.groupby("key_regions").chord]
gs = grams(chord_successions)
c = Counter(gs)
dict(sorted(c.items(), key=lambda a: a[1], reverse=True)[:k])
{('V', 'I'): 256,
 ('V7', 'I'): 206,
 ('V', 'i'): 196,
 ('I', 'V'): 167,
 ('i', 'V'): 154,
 ('V65', 'I'): 153,
 ('V', 'V7'): 144,
 ('V7', 'i'): 124,
 ('ii%65', 'V'): 123,
 ('V65', 'i'): 123,
 ('V', 'I6'): 119,
 ('V65', 'V7'): 111,
 ('V(4)', 'V'): 101,
 ('ii65', 'V'): 89,
 ('I6', 'I'): 86,
 ('V43', 'i'): 85,
 ('V43', 'I'): 84,
 ('V2', 'i6'): 83,
 ('i6', 'V43'): 82,
 ('V', 'i6'): 81,
 ('I', 'I6'): 74,
 ('V2', 'I6'): 67,
 ('I6', 'ii65'): 64,
 ('V', 'V2'): 64,
 ('i6', 'ii%65'): 63}

Absolute Counts (read from index to column)#

make_transition_matrix(chord_successions, k=k, distinct_only=True)
I i V V7 I6 i6 V43 ii65 V2 ii%65 V65 IV V6 ii vi V(4) ii%43 iv v6 VI IV6 ii7 IVM7 IV64 iii6
V 256 196 0 144 119 81 11 0 64 2 56 0 24 11 5 0 9 1 5 1 16 0 0 1 14
V7 206 124 2 0 2 5 1 0 1 0 9 0 0 0 14 0 0 0 0 8 2 0 0 0 0
I 0 3 167 30 74 0 22 37 8 0 46 49 46 1 40 32 0 0 0 0 11 4 14 14 1
i 1 0 154 3 0 55 25 3 9 40 59 15 14 0 0 13 21 11 24 17 2 1 0 0 0
V65 153 123 15 111 2 9 6 0 3 0 0 0 1 0 1 0 1 0 0 0 0 0 0 0 0
ii%65 0 9 123 4 0 16 1 0 7 0 2 0 0 0 0 20 6 0 0 0 0 2 0 0 0
V(4) 0 0 101 20 0 0 0 0 0 0 1 0 2 0 0 0 0 0 0 0 0 0 0 0 1
ii65 6 1 89 6 3 0 0 0 8 0 0 0 1 3 0 20 0 0 0 0 2 8 0 0 1
I6 86 0 32 1 0 0 62 64 1 0 10 35 10 1 13 15 0 0 0 0 4 15 2 0 0
V43 84 85 4 3 25 45 0 0 2 0 32 0 2 0 0 0 0 0 0 0 0 0 0 1 0
V2 2 3 1 0 67 83 7 0 0 0 2 0 1 0 0 0 0 0 0 0 0 0 0 0 1
i6 0 55 23 1 0 0 82 4 2 63 41 4 3 0 0 7 4 7 0 9 0 0 0 0 0
IV6 4 0 2 5 5 0 1 1 0 0 55 6 3 3 0 0 0 0 0 0 0 2 0 0 2
V(64) 1 0 54 8 0 0 0 0 0 1 0 1 0 0 0 2 0 0 0 0 0 0 0 0 0
ii%43 0 0 43 9 0 5 3 0 0 45 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0
IV 11 0 10 0 25 0 24 3 4 0 8 0 1 41 1 2 0 1 0 0 6 14 1 0 0
ii 0 0 38 13 0 1 0 0 1 0 12 4 7 0 5 0 0 0 0 0 3 2 0 0 0
vi 3 0 1 0 21 0 1 31 1 2 8 17 0 0 0 0 0 0 0 0 9 0 2 0 1
VIM7 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 31 0 0 0 0 0 0 0 0
V43/V 0 0 31 0 0 0 0 0 0 0 0 0 0 0 0 3 0 0 0 0 0 1 0 0 0
ii7 1 0 29 10 6 0 4 0 0 0 5 0 0 0 0 0 0 0 0 0 0 0 0 0 0
V65/iv 0 0 0 0 0 0 0 0 0 0 0 3 0 0 0 0 0 29 0 0 0 0 0 0 0
V6 23 10 26 4 9 3 0 0 1 0 8 0 0 0 9 0 0 0 6 0 1 0 0 0 1
VI 0 0 0 0 0 2 0 6 0 21 0 0 0 0 0 0 0 5 0 0 0 0 0 0 0
i64 0 2 0 1 0 5 4 0 17 5 1 0 0 0 0 0 20 0 0 0 0 0 0 0 0

Normalized Counts#

make_transition_matrix(
    chord_successions, k=k, distinct_only=True, normalize=True, decimals=2
)
I i V V7 I6 i6 V43 ii65 V2 ii%65 V65 IV V6 ii vi V(4) ii%43 iv v6 VI IV6 ii7 IVM7 IV64 iii6
V 0.23 0.17 0.00 0.13 0.11 0.07 0.01 0.00 0.06 0.00 0.05 0.00 0.02 0.01 0.00 0.00 0.01 0.00 0.00 0.00 0.01 0.00 0.00 0.00 0.01
V7 0.50 0.30 0.00 0.00 0.00 0.01 0.00 0.00 0.00 0.00 0.02 0.00 0.00 0.00 0.03 0.00 0.00 0.00 0.00 0.02 0.00 0.00 0.00 0.00 0.00
I 0.00 0.00 0.24 0.04 0.10 0.00 0.03 0.05 0.01 0.00 0.07 0.07 0.07 0.00 0.06 0.05 0.00 0.00 0.00 0.00 0.02 0.01 0.02 0.02 0.00
i 0.00 0.00 0.25 0.00 0.00 0.09 0.04 0.00 0.01 0.07 0.10 0.02 0.02 0.00 0.00 0.02 0.03 0.02 0.04 0.03 0.00 0.00 0.00 0.00 0.00
V65 0.33 0.26 0.03 0.24 0.00 0.02 0.01 0.00 0.01 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
ii%65 0.00 0.04 0.53 0.02 0.00 0.07 0.00 0.00 0.03 0.00 0.01 0.00 0.00 0.00 0.00 0.09 0.03 0.00 0.00 0.00 0.00 0.01 0.00 0.00 0.00
V(4) 0.00 0.00 0.79 0.16 0.00 0.00 0.00 0.00 0.00 0.00 0.01 0.00 0.02 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.01
ii65 0.03 0.01 0.51 0.03 0.02 0.00 0.00 0.00 0.05 0.00 0.00 0.00 0.01 0.02 0.00 0.11 0.00 0.00 0.00 0.00 0.01 0.05 0.00 0.00 0.01
I6 0.21 0.00 0.08 0.00 0.00 0.00 0.15 0.15 0.00 0.00 0.02 0.08 0.02 0.00 0.03 0.04 0.00 0.00 0.00 0.00 0.01 0.04 0.00 0.00 0.00
V43 0.28 0.28 0.01 0.01 0.08 0.15 0.00 0.00 0.01 0.00 0.11 0.00 0.01 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
V2 0.01 0.02 0.01 0.00 0.38 0.47 0.04 0.00 0.00 0.00 0.01 0.00 0.01 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.01
i6 0.00 0.14 0.06 0.00 0.00 0.00 0.21 0.01 0.01 0.16 0.10 0.01 0.01 0.00 0.00 0.02 0.01 0.02 0.00 0.02 0.00 0.00 0.00 0.00 0.00
IV6 0.04 0.00 0.02 0.05 0.05 0.00 0.01 0.01 0.00 0.00 0.54 0.06 0.03 0.03 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.02 0.00 0.00 0.02
V(64) 0.01 0.00 0.78 0.12 0.00 0.00 0.00 0.00 0.00 0.01 0.00 0.01 0.00 0.00 0.00 0.03 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
ii%43 0.00 0.00 0.34 0.07 0.00 0.04 0.02 0.00 0.00 0.35 0.01 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.01 0.00 0.00 0.00
IV 0.06 0.00 0.05 0.00 0.13 0.00 0.12 0.02 0.02 0.00 0.04 0.00 0.01 0.21 0.01 0.01 0.00 0.01 0.00 0.00 0.03 0.07 0.01 0.00 0.00
ii 0.00 0.00 0.36 0.12 0.00 0.01 0.00 0.00 0.01 0.00 0.11 0.04 0.07 0.00 0.05 0.00 0.00 0.00 0.00 0.00 0.03 0.02 0.00 0.00 0.00
vi 0.03 0.00 0.01 0.00 0.19 0.00 0.01 0.28 0.01 0.02 0.07 0.15 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.08 0.00 0.02 0.00 0.01
VIM7 0.00 0.00 0.04 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.65 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
V43/V 0.00 0.00 0.89 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.09 0.00 0.00 0.00 0.00 0.00 0.03 0.00 0.00 0.00
ii7 0.02 0.00 0.45 0.15 0.09 0.00 0.06 0.00 0.00 0.00 0.08 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
V65/iv 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.07 0.00 0.00 0.00 0.00 0.00 0.64 0.00 0.00 0.00 0.00 0.00 0.00 0.00
V6 0.16 0.07 0.18 0.03 0.06 0.02 0.00 0.00 0.01 0.00 0.05 0.00 0.00 0.00 0.06 0.00 0.00 0.00 0.04 0.00 0.01 0.00 0.00 0.00 0.01
VI 0.00 0.00 0.00 0.00 0.00 0.04 0.00 0.13 0.00 0.46 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.11 0.00 0.00 0.00 0.00 0.00 0.00 0.00
i64 0.00 0.03 0.00 0.01 0.00 0.07 0.06 0.00 0.25 0.07 0.01 0.00 0.00 0.00 0.00 0.00 0.29 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00

Entropy#

make_transition_matrix(
    chord_successions, k=k, IC=True, distinct_only=True, smooth=1, decimals=2
)
I i V V7 I6 i6 V43 ii65 V2 ii%65 V65 IV V6 ii vi V(4) ii%43 iv v6 VI IV6 ii7 IVM7 IV64 iii6
V 2.40 2.79 10.41 3.23 3.50 4.05 6.83 10.41 4.39 8.83 4.58 10.41 5.77 6.83 7.83 10.41 7.09 9.41 7.83 9.41 6.32 10.41 10.41 9.41 6.50
V7 1.64 2.37 7.75 9.34 7.75 6.75 8.34 9.34 8.34 9.34 6.02 9.34 9.34 9.34 5.43 9.34 9.34 9.34 9.34 6.17 7.75 9.34 9.34 9.34 9.34
I 9.88 7.88 2.49 4.93 3.65 9.88 5.36 4.63 6.71 9.88 4.33 4.24 4.33 8.88 4.52 4.84 9.88 9.88 9.88 9.88 6.30 7.56 5.97 5.97 8.88
i 8.72 9.72 2.44 7.72 9.72 3.91 5.02 7.72 6.40 4.36 3.81 5.72 5.81 9.72 9.72 5.91 5.26 6.14 5.08 5.55 8.14 8.72 9.72 9.72 9.72
V65 2.19 2.51 5.46 2.65 7.87 6.14 6.65 9.46 7.46 9.46 9.46 9.46 8.46 9.46 8.46 9.46 8.46 9.46 9.46 9.46 9.46 9.46 9.46 9.46 9.46
ii%65 8.88 5.55 1.92 6.55 8.88 4.79 7.88 8.88 5.88 8.88 7.29 8.88 8.88 8.88 8.88 4.48 6.07 8.88 8.88 8.88 8.88 7.29 8.88 8.88 8.88
V(4) 8.51 8.51 1.84 4.12 8.51 8.51 8.51 8.51 8.51 8.51 7.51 8.51 6.93 8.51 8.51 8.51 8.51 8.51 8.51 8.51 8.51 8.51 8.51 8.51 7.51
ii65 5.88 7.69 2.19 5.88 6.69 8.69 8.69 8.69 5.52 8.69 8.69 8.69 7.69 6.69 8.69 4.29 8.69 8.69 8.69 8.69 7.10 5.52 8.69 8.69 7.69
I6 2.90 9.34 4.30 8.34 9.34 9.34 3.37 3.32 8.34 9.34 5.88 4.17 5.88 8.34 5.54 5.34 9.34 9.34 9.34 9.34 7.02 5.34 7.76 9.34 9.34
V43 2.67 2.65 6.75 7.08 4.38 3.55 9.08 9.08 7.49 9.08 4.03 9.08 7.49 9.08 9.08 9.08 9.08 9.08 9.08 9.08 9.08 9.08 9.08 8.08 9.08
V2 7.11 6.69 7.69 8.69 2.60 2.30 5.69 8.69 8.69 8.69 7.11 8.69 7.69 8.69 8.69 8.69 8.69 8.69 8.69 8.69 8.69 8.69 8.69 8.69 7.69
i6 9.29 3.49 4.71 8.29 9.29 9.29 2.92 6.97 7.71 3.29 3.90 6.97 7.29 9.29 9.29 6.29 6.97 6.29 9.29 5.97 9.29 9.29 9.29 9.29 9.29
IV6 6.08 8.41 6.82 5.82 5.82 8.41 7.41 7.41 8.41 8.41 2.60 5.60 6.41 6.41 8.41 8.41 8.41 8.41 8.41 8.41 8.41 6.82 8.41 8.41 6.82
V(64) 7.26 8.26 2.48 5.09 8.26 8.26 8.26 8.26 8.26 7.26 8.26 7.26 8.26 8.26 8.26 6.67 8.26 8.26 8.26 8.26 8.26 8.26 8.26 8.26 8.26
ii%43 8.51 8.51 3.05 5.19 8.51 5.93 6.51 8.51 8.51 2.99 7.51 8.51 8.51 8.51 8.51 8.51 8.51 8.51 8.51 8.51 8.51 7.51 8.51 8.51 8.51
IV 5.17 8.75 5.29 8.75 4.05 8.75 4.11 6.75 6.43 8.75 5.58 8.75 7.75 3.36 7.75 7.17 8.75 7.75 8.75 8.75 5.94 4.84 7.75 8.75 8.75
ii 8.42 8.42 3.13 4.61 8.42 7.42 8.42 8.42 7.42 8.42 4.72 6.10 5.42 8.42 5.83 8.42 8.42 8.42 8.42 8.42 6.42 6.83 8.42 8.42 8.42
vi 6.45 8.45 7.45 8.45 3.99 8.45 7.45 3.45 7.45 6.86 5.28 4.28 8.45 8.45 8.45 8.45 8.45 8.45 8.45 8.45 5.13 8.45 6.86 8.45 7.45
VIM7 8.15 8.15 6.57 8.15 8.15 8.15 8.15 8.15 8.15 8.15 8.15 8.15 8.15 8.15 8.15 8.15 3.15 8.15 8.15 8.15 8.15 8.15 8.15 8.15 8.15
V43/V 8.09 8.09 3.09 8.09 8.09 8.09 8.09 8.09 8.09 8.09 8.09 8.09 8.09 8.09 8.09 6.09 8.09 8.09 8.09 8.09 8.09 7.09 8.09 8.09 8.09
ii7 7.24 8.24 3.33 4.78 5.43 8.24 5.92 8.24 8.24 8.24 5.65 8.24 8.24 8.24 8.24 8.24 8.24 8.24 8.24 8.24 8.24 8.24 8.24 8.24 8.24
V65/iv 8.14 8.14 8.14 8.14 8.14 8.14 8.14 8.14 8.14 8.14 8.14 6.14 8.14 8.14 8.14 8.14 8.14 3.23 8.14 8.14 8.14 8.14 8.14 8.14 8.14
V6 4.00 5.12 3.83 6.26 5.26 6.58 8.58 8.58 7.58 8.58 5.41 8.58 8.58 8.58 5.26 8.58 8.58 8.58 5.77 8.58 7.58 8.58 8.58 8.58 7.58
VI 8.14 8.14 8.14 8.14 8.14 6.56 8.14 5.34 8.14 3.69 8.14 8.14 8.14 8.14 8.14 8.14 8.14 5.56 8.14 8.14 8.14 8.14 8.14 8.14 8.14
i64 8.26 6.67 8.26 7.26 8.26 5.67 5.94 8.26 4.09 5.67 7.26 8.26 8.26 8.26 8.26 8.26 3.87 8.26 8.26 8.26 8.26 8.26 8.26 8.26 8.26

Minor vs. Major#

region_is_minor = (
    df.groupby("key_regions")
    .localkey_is_minor.unique()
    .map(lambda values: values[0])
    .to_dict()
)
region_key = (
    df.groupby("key_regions").localkey.unique().map(lambda values: values[0]).to_dict()
)
key_chords = {
    ix: s.to_list()
    for ix, s in df.reset_index().groupby(["piece", "key_regions"]).chord
}
major, minor = [], []
for chords, is_minor in zip(key_chords.values(), region_is_minor.values()):
    (major, minor)[is_minor].append(chords)
make_transition_matrix(major, k=k, distinct_only=True, normalize=True)
I i V V7 I6 V43 i6 ii65 V65 ii%65 V6 V2 vi IV V(4) ii iv ii%43 v6 IV6 VI ii43 V(64) iii6 ii2
V 0.218220 0.177966 0.000000 0.135593 0.118644 0.010593 0.067797 0.000000 0.044492 0.000000 0.021186 0.046610 0.006356 0.000000 0.000000 0.008475 0.000000 0.008475 0.004237 0.021186 0.000000 0.002119 0.000000 0.012712 0.00000
V7 0.448864 0.318182 0.005682 0.000000 0.005682 0.000000 0.017045 0.000000 0.022727 0.000000 0.000000 0.005682 0.045455 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.005682 0.034091 0.000000 0.022727 0.000000 0.00000
i 0.000000 0.000000 0.302326 0.000000 0.000000 0.062016 0.096899 0.003876 0.100775 0.054264 0.015504 0.015504 0.000000 0.023256 0.007752 0.000000 0.019380 0.034884 0.042636 0.000000 0.034884 0.000000 0.000000 0.000000 0.00000
V65 0.339806 0.252427 0.033981 0.233010 0.004854 0.024272 0.029126 0.000000 0.000000 0.000000 0.000000 0.000000 0.004854 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.00000
I 0.000000 0.003378 0.236486 0.040541 0.118243 0.040541 0.000000 0.057432 0.064189 0.000000 0.074324 0.003378 0.067568 0.060811 0.050676 0.000000 0.000000 0.000000 0.000000 0.020270 0.000000 0.003378 0.000000 0.000000 0.02027
ii%65 0.000000 0.030612 0.622449 0.010204 0.000000 0.010204 0.020408 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.091837 0.000000 0.000000 0.030612 0.000000 0.000000 0.000000 0.000000 0.071429 0.000000 0.00000
V43 0.281046 0.241830 0.019608 0.000000 0.091503 0.000000 0.137255 0.000000 0.117647 0.000000 0.013072 0.006536 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.00000
V(4) 0.000000 0.000000 0.796296 0.185185 0.000000 0.000000 0.000000 0.000000 0.018519 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.00000
ii65 0.012048 0.012048 0.445783 0.048193 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.048193 0.000000 0.000000 0.144578 0.036145 0.000000 0.000000 0.000000 0.024096 0.000000 0.096386 0.060241 0.000000 0.00000
I6 0.190476 0.000000 0.074074 0.005291 0.000000 0.179894 0.000000 0.164021 0.026455 0.000000 0.010582 0.005291 0.037037 0.084656 0.031746 0.005291 0.000000 0.000000 0.000000 0.015873 0.000000 0.005291 0.015873 0.000000 0.00000
i6 0.000000 0.143750 0.043750 0.000000 0.000000 0.206250 0.000000 0.012500 0.118750 0.162500 0.006250 0.006250 0.000000 0.006250 0.018750 0.000000 0.025000 0.012500 0.000000 0.000000 0.025000 0.000000 0.006250 0.000000 0.00000
V2 0.013889 0.013889 0.013889 0.000000 0.402778 0.069444 0.416667 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.00000
IV6 0.055556 0.000000 0.000000 0.055556 0.037037 0.018519 0.000000 0.000000 0.537037 0.000000 0.055556 0.000000 0.000000 0.018519 0.000000 0.055556 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.037037 0.018519 0.00000
V(64) 0.000000 0.000000 0.777778 0.074074 0.000000 0.000000 0.000000 0.000000 0.000000 0.037037 0.000000 0.000000 0.000000 0.000000 0.037037 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.00000
ii%43 0.000000 0.000000 0.358491 0.075472 0.000000 0.056604 0.037736 0.000000 0.018868 0.377358 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.018868 0.000000 0.00000
vi 0.017857 0.000000 0.000000 0.000000 0.250000 0.017857 0.000000 0.303571 0.071429 0.017857 0.000000 0.000000 0.000000 0.071429 0.000000 0.000000 0.000000 0.000000 0.000000 0.107143 0.000000 0.000000 0.000000 0.017857 0.00000
ii 0.000000 0.000000 0.404762 0.071429 0.000000 0.000000 0.023810 0.000000 0.095238 0.000000 0.047619 0.023810 0.047619 0.023810 0.000000 0.000000 0.000000 0.000000 0.000000 0.047619 0.000000 0.000000 0.000000 0.000000 0.00000
ii7 0.000000 0.000000 0.520000 0.120000 0.040000 0.040000 0.000000 0.000000 0.120000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.040000 0.000000 0.00000
IV 0.025000 0.000000 0.012500 0.000000 0.162500 0.137500 0.000000 0.025000 0.037500 0.000000 0.000000 0.025000 0.000000 0.000000 0.012500 0.162500 0.012500 0.000000 0.000000 0.050000 0.000000 0.012500 0.000000 0.000000 0.00000
VI 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.038462 0.076923 0.000000 0.461538 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.192308 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.00000
V6 0.203390 0.101695 0.135593 0.033898 0.016949 0.000000 0.016949 0.000000 0.050847 0.000000 0.000000 0.016949 0.084746 0.000000 0.000000 0.000000 0.000000 0.000000 0.033898 0.016949 0.000000 0.016949 0.000000 0.000000 0.00000
V65/iv 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.150000 0.000000 0.000000 0.550000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.00000
VIM7 0.000000 0.000000 0.050000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.550000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.00000
V43/V 0.000000 0.000000 0.909091 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.090909 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.00000
V7/IV 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.833333 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.00000
make_transition_matrix(minor, k=k, distinct_only=True, normalize=True)
I i V V7 I6 V43 i6 V2 V65 ii%65 ii65 IV ii V6 vi iv V(4) ii%43 v6 IVM7 i64 IV64 V(64) III VI
V 0.229779 0.161765 0.000000 0.123162 0.108456 0.011029 0.082721 0.077206 0.062500 0.001838 0.000000 0.000000 0.011029 0.022059 0.001838 0.000000 0.000000 0.009191 0.005515 0.000000 0.009191 0.001838 0.000000 0.000000 0.001838
V7 0.592593 0.280423 0.005291 0.000000 0.005291 0.005291 0.010582 0.000000 0.026455 0.000000 0.000000 0.000000 0.000000 0.000000 0.021164 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.005291
I 0.000000 0.002882 0.233429 0.037464 0.103746 0.025937 0.000000 0.011527 0.074928 0.000000 0.040346 0.077810 0.002882 0.069164 0.054755 0.000000 0.040346 0.000000 0.000000 0.025937 0.000000 0.025937 0.023055 0.000000 0.000000
V65 0.311688 0.277056 0.030303 0.264069 0.004329 0.004329 0.012987 0.012987 0.000000 0.000000 0.000000 0.000000 0.000000 0.004329 0.000000 0.000000 0.000000 0.004329 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
i 0.003509 0.000000 0.217544 0.003509 0.000000 0.028070 0.091228 0.007018 0.112281 0.080702 0.007018 0.024561 0.000000 0.028070 0.000000 0.014035 0.035088 0.042105 0.042105 0.000000 0.010526 0.000000 0.010526 0.007018 0.028070
ii%65 0.000000 0.051724 0.448276 0.017241 0.000000 0.000000 0.112069 0.034483 0.017241 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.094828 0.025862 0.000000 0.000000 0.043103 0.000000 0.051724 0.000000 0.000000
V(4) 0.000000 0.000000 0.838710 0.145161 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
I6 0.227723 0.000000 0.084158 0.000000 0.000000 0.138614 0.000000 0.000000 0.024752 0.000000 0.148515 0.094059 0.000000 0.039604 0.024752 0.000000 0.024752 0.000000 0.000000 0.004950 0.000000 0.000000 0.024752 0.000000 0.000000
i6 0.000000 0.153061 0.056122 0.005102 0.000000 0.229592 0.000000 0.005102 0.107143 0.163265 0.010204 0.010204 0.000000 0.010204 0.000000 0.015306 0.020408 0.010204 0.000000 0.000000 0.000000 0.000000 0.020408 0.000000 0.020408
ii65 0.051282 0.000000 0.538462 0.025641 0.038462 0.000000 0.000000 0.038462 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.102564 0.000000 0.000000 0.000000 0.000000 0.000000 0.064103 0.000000 0.000000
V43 0.276119 0.298507 0.007463 0.014925 0.074627 0.000000 0.179104 0.007463 0.097015 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.022388 0.000000 0.000000 0.000000 0.000000
V2 0.011765 0.023529 0.000000 0.000000 0.388235 0.023529 0.458824 0.000000 0.023529 0.000000 0.000000 0.000000 0.000000 0.011765 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
V(64) 0.025641 0.000000 0.769231 0.153846 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.025641 0.000000 0.000000 0.000000 0.000000 0.025641 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
IV 0.061856 0.000000 0.051546 0.000000 0.113402 0.113402 0.000000 0.010309 0.051546 0.000000 0.010309 0.000000 0.278351 0.010309 0.010309 0.000000 0.010309 0.000000 0.000000 0.000000 0.000000 0.000000 0.010309 0.000000 0.000000
ii%43 0.000000 0.000000 0.328125 0.031250 0.000000 0.000000 0.046875 0.000000 0.000000 0.375000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.140625 0.000000 0.015625 0.000000 0.000000
IV6 0.023810 0.000000 0.023810 0.023810 0.071429 0.000000 0.000000 0.000000 0.547619 0.000000 0.023810 0.119048 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
ii 0.000000 0.000000 0.372549 0.078431 0.000000 0.000000 0.000000 0.000000 0.137255 0.000000 0.000000 0.039216 0.000000 0.098039 0.058824 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.019608 0.000000 0.000000
V43/V 0.000000 0.000000 0.863636 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.090909 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
V6 0.125000 0.050000 0.200000 0.025000 0.100000 0.000000 0.025000 0.000000 0.062500 0.000000 0.000000 0.000000 0.000000 0.000000 0.050000 0.000000 0.000000 0.000000 0.050000 0.000000 0.025000 0.000000 0.000000 0.000000 0.000000
V65/iv 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.761905 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
ii7 0.030303 0.000000 0.454545 0.151515 0.121212 0.030303 0.000000 0.000000 0.060606 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.060606 0.000000 0.000000
VIM7 0.000000 0.000000 0.047619 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.666667 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
vi 0.040816 0.000000 0.020408 0.000000 0.142857 0.000000 0.000000 0.000000 0.081633 0.020408 0.265306 0.224490 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
ii6 0.000000 0.000000 0.413793 0.000000 0.103448 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.103448 0.000000 0.000000 0.000000 0.034483 0.034483 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
i64 0.000000 0.000000 0.000000 0.031250 0.000000 0.062500 0.156250 0.250000 0.000000 0.062500 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.312500 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000

Chord progressions without suspensions#

Here called plain chords, which consist only of numeral, inversion figures, and relative keys.

df["plain_chords"] = (
    df.numeral + df.figbass.fillna("") + ("/" + df.relativeroot).fillna("")
)
df.plain_chords.iloc[:k]
corpus             piece           i 
couperin_concerts  c01n01_prelude  0          I
                                   1          V
                                   2         I6
                                   3          I
                                   4          V
                                   5          V
                                   6          I
                                   7         vi
                                   8         I6
                                   9         V6
                                   10    vi43/V
                                   11     ii7/V
                                   12     V43/V
                                   13         V
                                   14        ii
                                   15     #vii7
                                   16      III7
                                   17        i6
                                   18      ii65
                                   19       V64
                                   20        V7
                                   21         I
                                   22         V
                                   23       I64
                                   24        V2
Name: plain_chords, dtype: string

Consecutive identical labels are merged

def remove_subsequent_identical(col):
    return col[col != col.shift()].to_list()


key_regions_plain_chords = (
    df.reset_index()
    .groupby(["piece", "key_regions"])
    .plain_chords.apply(remove_subsequent_identical)
)
key_plain_chords = {ix: s for ix, s in key_regions_plain_chords.items()}
major_plain, minor_plain = [], []
for chords, is_minor in zip(key_plain_chords.values(), region_is_minor.values()):
    (major_plain, minor_plain)[is_minor].append(chords)
plain_chords_per_segment = {k: len(v) for k, v in key_plain_chords.items()}
print(
    f"The local key segments have {sum(plain_chords_per_segment.values())} 'plain chords' without immediate "
    f"repetitions, yielding {len(grams(list(key_plain_chords.values())))} bigrams.\n{sum(map(len, major_plain))} "
    f"chords are in major, {sum(map(len, minor_plain))} in minor."
)
The local key segments have 7521 'plain chords' without immediate repetitions, yielding 6956 bigrams.
3217 chords are in major, 3573 in minor.
{
    segment: chord_count
    for segment, chord_count in list(
        {
            (
                piece,
                region_key[key] + (" minor" if region_is_minor[key] else " major"),
            ): v
            for (piece, key), v in plain_chords_per_segment.items()
        }.items()
    )[:k]
}
{('c01n01_prelude', 'I major'): 42,
 ('c01n01_prelude', 'ii minor'): 7,
 ('c01n01_prelude', 'V major'): 6,
 ('c01n01_prelude', 'IV major'): 5,
 ('c01n02_allemande', 'I major'): 20,
 ('c01n02_allemande', 'V major'): 21,
 ('c01n02_allemande', 'vi minor'): 10,
 ('c01n03_sarabande', 'i minor'): 15,
 ('c01n03_sarabande', 'III major'): 3,
 ('c01n03_sarabande', 'VII major'): 2,
 ('c01n03_sarabande', 'v minor'): 14,
 ('c01n04_gavotte', 'i minor'): 20,
 ('c01n04_gavotte', 'III major'): 7,
 ('c01n05_gigue', 'I major'): 46,
 ('c01n05_gigue', 'V major'): 2,
 ('c01n05_gigue', 'vi minor'): 15,
 ('c01n05_gigue', 'IV major'): 2,
 ('c01n05_gigue', 'ii minor'): 5,
 ('c01n06_menuet_en_trio', 'i minor'): 14,
 ('c01n06_menuet_en_trio', 'III major'): 8,
 ('c02n01_prelude', 'I major'): 17,
 ('c02n01_prelude', 'V major'): 4,
 ('c02n02_allemande_fuguee', 'I major'): 28,
 ('c02n02_allemande_fuguee', 'V major'): 5,
 ('c02n02_allemande_fuguee', 'IV major'): 13}
print(
    f"Segments being in the same local key have a mean length of {round(mean(plain_chords_per_segment.values()), 2)} "
    f"plain chords."
)
Segments being in the same local key have a mean length of 13.29 plain chords.

Most frequent 3-, 4-, and 5-grams in major#

utils.sorted_gram_counts(major_plain, 3)
N = 548
count %
progression
(V, V7, I) 41 7.48
(ii65, V, V7) 32 5.84
(I, V, I) 29 5.29
(i, V, i) 29 5.29
(ii65, V, i) 27 4.93
(ii65, V, I) 27 4.93
(I6, V43, I) 26 4.74
(i6, ii65, V) 26 4.74
(V65, i, V) 25 4.56
(V, V7, i) 25 4.56
(ii43, ii65, V) 22 4.01
(V65, V7, i) 22 4.01
(V65, I, V) 21 3.83
(i6, V43, i) 20 3.65
(I6, ii65, V) 18 3.28
(IV6, V65, I) 18 3.28
(V, i, V) 18 3.28
(V, I6, I) 17 3.10
(V43, I, V) 17 3.10
(i, V65, i) 17 3.10
(I6, I, V) 15 2.74
(vi, ii65, V) 15 2.74
(i, V, V7) 15 2.74
(V43, i, V) 14 2.55
(I, V65, I) 12 2.19
utils.sorted_gram_counts(major_plain, 4)
N = 207
count %
progression
(ii65, V, V7, I) 16 7.73
(ii65, V, V7, i) 15 7.25
(V, I6, I, V) 11 5.31
(i6, ii65, V, i) 11 5.31
(i, V65, i, V) 9 4.35
(IV, I6, V43, I) 9 4.35
(I, V, V7, I) 9 4.35
(I6, ii65, V, I) 9 4.35
(V, I6, V43, I) 8 3.86
(vi, ii65, V, I) 8 3.86
(V65, i, V, i) 8 3.86
(V, i, V65, i) 8 3.86
(i, V, V7, i) 8 3.86
(IV, V43, I, V) 7 3.38
(V43, i, V65, i) 7 3.38
(V43, I, V, I) 7 3.38
(V43, i, V, i) 7 3.38
(IV6, V65, I, V) 7 3.38
(i6, ii65, V, V7) 7 3.38
(I6, I, V, I) 6 2.90
(V2, i6, ii65, V) 6 2.90
(V43, i6, ii65, V) 6 2.90
(i, V65, V7, i) 6 2.90
(i6, V65, i, V) 6 2.90
(V65, i, V, V7) 6 2.90
utils.sorted_gram_counts(major_plain, 5)
N = 106
count %
progression
(i6, ii65, V, V7, i) 6 5.66
(V, I6, I, V, I) 5 4.72
(i, V, i, V65, i) 5 4.72
(I6, I, V, V7, I) 5 4.72
(vi, ii65, V, V7, I) 5 4.72
(ii65, V, i6, V43, i) 5 4.72
(i6, V43, i, V, i) 5 4.72
(I6, ii65, V, V7, I) 4 3.77
(I6, IV, V43, I, V) 4 3.77
(V2, i6, ii65, V, i) 4 3.77
(V43, i, V65, i, V) 4 3.77
(I, IV, I6, V43, I) 4 3.77
(V65, V7, I, V65, V7) 4 3.77
(V7, I, V65, V7, I) 4 3.77
(V65, i, V, i, V65) 4 3.77
(V, i, V65, i, V) 4 3.77
(V, I6, I, V, V7) 4 3.77
(VI, ii65, V, V7, i) 4 3.77
(V65, V7, i, ii65, V) 4 3.77
(i, V, i, V, i) 4 3.77
(V, i6, V43, i, V) 4 3.77
(V65, i, i6, ii65, V) 4 3.77
(i6, V43, i, V65, V7) 4 3.77
(I, V, I6, V43, I) 3 2.83
(I, IV7, ii65, V2, I6) 3 2.83

Most frequent 3-, 4-, and 5-grams in minor#

utils.sorted_gram_counts(minor_plain, 3)
N = 602
count %
progression
(V, V7, I) 57 9.47
(I, V, I) 44 7.31
(ii65, V, V7) 37 6.15
(i, V, i) 34 5.65
(ii65, V, I) 30 4.98
(i6, V43, i) 30 4.98
(V65, V7, I) 29 4.82
(V65, i, V) 28 4.65
(V65, I, V) 26 4.32
(V, V7, i) 24 3.99
(ii43, ii65, V) 23 3.82
(ii65, V, i) 23 3.82
(V65, V7, i) 20 3.32
(I6, V43, I) 20 3.32
(i6, ii65, V) 19 3.16
(I6, ii65, V) 17 2.82
(i, V65, i) 17 2.82
(V, I6, I) 16 2.66
(V, I, V) 16 2.66
(V43, I, V) 16 2.66
(I6, I, V) 16 2.66
(I, V, V7) 15 2.49
(V, i6, V43) 15 2.49
(i, ii65, V) 15 2.49
(V, V2, i6) 15 2.49
utils.sorted_gram_counts(minor_plain, 4)
N = 237
count %
progression
(ii65, V, V7, I) 23 9.70
(I, V, V7, I) 14 5.91
(ii65, V, V7, i) 13 5.49
(V65, I, V, I) 13 5.49
(vi, ii65, V, I) 11 4.64
(V65, i, V, i) 11 4.64
(V, i6, V43, i) 10 4.22
(V2, i6, V43, i) 10 4.22
(V43, i, V, i) 9 3.80
(i, ii65, V, i) 9 3.80
(i6, V43, i, V) 9 3.80
(i, V65, i, V) 9 3.80
(I6, V43, I, V) 9 3.80
(V, i, V65, i) 8 3.38
(ii65, i6, ii65, i6) 8 3.38
(I6, V, V7, I) 8 3.38
(I, V65, I, V) 7 2.95
(V65, V7, i, V) 7 2.95
(I, V65, V7, I) 7 2.95
(i6, V43, i, V65) 7 2.95
(i6, ii65, V, i) 7 2.95
(I6, ii65, V, I) 7 2.95
(I6, I, V, I) 7 2.95
(I6, ii65, V, V7) 7 2.95
(ii43, ii65, V, V7) 7 2.95
utils.sorted_gram_counts(minor_plain, 5)
N = 121
count %
progression
(i6, V43, i, V, i) 8 6.61
(I6, ii65, V, V7, I) 7 5.79
(V, I, V, V7, I) 6 4.96
(ii65, i6, ii65, i6, ii65) 6 4.96
(i6, ii65, i6, ii65, i6) 6 4.96
(V65, V7, I, V65, V7) 5 4.13
(V7, I, V65, V7, I) 5 4.13
(I, V65, V7, I, V65) 5 4.13
(I, ii65, V, V7, I) 5 4.13
(V, i6, V43, i, V) 5 4.13
(i, V, i, V65, i) 5 4.13
(V, i, V65, i, V) 5 4.13
(I, V7, I, V7, I) 5 4.13
(i6, ii65, V, V7, i) 4 3.31
(I, IV64, I, V, I) 4 3.31
(ii65, V, i6, V43, i) 4 3.31
(V, V2, i6, V43, i) 4 3.31
(V65, V7, i, ii65, V) 4 3.31
(I, V, V65, I, V) 4 3.31
(I, V, I, V43/V, V) 4 3.31
(IV, ii, V6, V, I6) 4 3.31
(IV, I6, V43, I, V) 4 3.31
(i6, V43, i6, ii65, V) 4 3.31
(i, V65, i, V, i) 4 3.31
(ii43, ii65, V, V7, I) 4 3.31

Counting particular progressions#

MEMORY = {}
chord_progressions = list(key_plain_chords.values())


def look_for(n_gram):
    n = len(n_gram)
    if n in MEMORY:
        n_grams = MEMORY[n]
    else:
        n_grams = grams(chord_progressions, n)
        MEMORY[n] = n_grams
    matches = n_grams.count(n_gram)
    total = len(n_grams)
    return f"{matches} ({round(100*matches/total, 3)} %)"
look_for(("i", "v6"))
'17 (0.244 %)'
look_for(("i", "v6", "iv6"))
'5 (0.078 %)'
look_for(("i", "v6", "iv6", "V"))
'3 (0.051 %)'
look_for(("i", "V6", "v6"))
'1 (0.016 %)'
look_for(("V", "IV6", "V65"))
'15 (0.235 %)'

Chord progressions preceding phrase endings#

def phraseending_progressions(df, n=3, k=k):
    selector = (
        df.groupby(level=0, group_keys=False)
        .phraseend.apply(lambda col: col.notna().shift().fillna(True))
        .cumsum()
    )
    print(f"{selector.max()} phrases overall.")
    phraseends = (
        df.groupby(selector)
        .apply(lambda df: df.chord.iloc[-n:].reset_index(drop=True))
        .unstack()
    )
    return (
        phraseends.groupby(phraseends.columns.to_list())
        .size()
        .sort_values(ascending=False)
        .iloc[:k]
    )
phraseending_progressions(df)
1145 phrases overall.
0      1      2
V      V7     I    75
V(4)   V      I    51
V      V7     i    42
V(64)  V      I    29
I      V      I    28
ii65   V      I    25
V(4)   V      i    24
ii%65  V      i    22
V65    i      V    20
i      V      i    20
V(64)  V      i    17
V65    I      V    14
V43    I      V    12
V(4)   V7     I    12
V7     i      V    10
ii%43  ii%65  V     9
i(4)   i      V     9
VIM7   ii%43  V     9
V      i      V     9
V65    V7     I     9
V43    i      V     8
ii%65  V      I     8
vi     ii65   V     6
V(4)   V7     i     6
I(4)   I      V     6
dtype: int64
phraseending_progressions(df, 4)
1145 phrases overall.
0      1      2   3
ii65   V      V7  I    23
ii%65  V      V7  i    20
I      V      V7  I    15
ii65   V(4)   V   I    15
I      V(4)   V   I    15
vi     ii65   V   I    14
i      V      V7  i    11
ii%65  V(4)   V   i    11
V43    I      V   I    10
I6     V      V7  I     9
V43    i      V   i     9
i      V(4)   V   i     8
I6     V(4)   V   I     8
ii%65  V(64)  V   i     8
i      ii%65  V   i     7
I      ii65   V   I     7
ii%65  V      V7  I     6
I6     V(64)  V   I     6
V65    i(4)   i   V     6
I      V(64)  V   I     6
V65    V7     i   V     6
ii65   V(64)  V   I     6
IV     V43    I   V     6
ii65   V      V7  i     5
I      V(4)   V7  I     5
dtype: int64